In this project, I used Python and Keras with TensorFlow backend to classify German traffic signs.
I use the German Traffic Sign Dataset, this dataset contains more than 50,000 images of 43 classes (German traffic signs).
I could reach a validation accuracy of 97.30% and a test accuracy of 94.66%.
The dataset could be downloaded from traffic-signs-data. This dataset contains the following three pickle files:
Each of the 3 pickle files contains a dictionary with 4 key/value pairs:
'features' is a 4D array containing raw pixel data of the traffic sign images, (num examples, width, height, channels).'labels' is a 1D array containing the label/class id of the traffic sign. The file signnames.csv contains id -> name mappings for each id.'sizes' is a list containing tuples, (width, height) representing the original width and height the image.'coords' is a list containing tuples, (x1, y1, x2, y2) representing coordinates of a bounding box around the sign in the image. THESE COORDINATES ASSUME THE ORIGINAL IMAGE. THE PICKLED DATA CONTAINS RESIZED VERSIONS (32 by 32) OF THESE IMAGESAll images (features) in the dataset are alreday resized to the size of 32 x 32 pixles.
The data (train.p, test.p, valid.p) must be located in the data folder one level above the notebook to run the following steps!
# Load pickled data
import pickle
import os
training_file = '../data/train.p'
validation_file = '../data/valid.p'
testing_file = '../data/test.p'
with open(training_file, mode='rb') as f:
train = pickle.load(f)
with open(validation_file, mode='rb') as f:
valid = pickle.load(f)
with open(testing_file, mode='rb') as f:
test = pickle.load(f)
X_train, y_train = train['features'], train['labels']
X_valid, y_valid = valid['features'], valid['labels']
X_test, y_test = test['features'], test['labels']
Load traffic sign names from signnames.csv. The names will be needed later to display the descriptive names of the traffic signs instead of the class IDs.
import csv
import numpy as np
# load class-ids and sign names from csv file
def load_signnames_from_csv(filename):
rows = []
with open(filename) as csvfile:
reader = csv.reader(csvfile, delimiter=',')
next(reader) # skip header
for row in reader:
class_id = row[0]
sign_name = row[1]
rows.append((class_id, sign_name))
return np.array(rows)
sign_names = load_signnames_from_csv('signnames.csv')
num_classes = len(sign_names)
print('Number of classes: {}'.format(num_classes))
print()
for sign in sign_names:
print('{:4d}: {}'.format(int(sign[0]), sign[1]))
### Replace each question mark with the appropriate value.
### Use python, pandas or numpy methods rather than hard coding the results
import numpy as np
# Number of training examples
n_train = len(X_train)
# Number of validation examples
n_validation = len(X_valid)
# Number of testing examples.
n_test = len(X_test)
# What's the shape of an traffic sign image?
image_shape = X_train[0].shape
# How many unique classes/labels there are in the dataset.
n_classes = len(np.unique(y_train))
print("Number of training examples =", n_train)
print("Number of testing examples =", n_test)
print("Image data shape =", image_shape)
print("Number of classes =", n_classes)
print()
print("Image Shape: {}".format(X_train[0].shape) )
First, we examine how often each class is represented in the dataset. The easiest way is to use a histogram of the number of images in unique each class.
import matplotlib.pyplot as plt
%matplotlib inline
# histogram of class frequency
fig, ax = plt.subplots()
hist, bins = np.histogram(y_train, bins=n_classes)
center = np.array(range(0, n_classes))
median = np.median(hist)
ax.plot(bins, np.full(len(bins), median, dtype=int), '--', color='blue')
ax.bar(center, hist, align='center', width=0.8)
ax.set_title("Number of images per class")
ax.set_xlabel('Class')
ax.set_ylabel('Number of images')
plt.text(n_classes+3, median, 'Median: {}'.format(median), color='blue')
fig.tight_layout()
plt.show()
print()
print('Median of images per class: {}'.format(median))
If the classes in the dataset are represented very differently, the classes that are represented more frequently are preferred in the classification. However, this should be avoided, therefore each class in the training data set should be represented by an approximately equal number of images.
The histogram above shows that classes are very differently represented, which adversely affects predictive accuracy. Normally, it would be better to supplement the weakly represented classes with more pictures until the number of pictures per class is about the same. But for that we would need a lot more pictures of traffic signs. Therefore I decided to cut the items per class of the training dataset at the median number of images per classe.
# cut number of images per class to the median number images per class
def equalize_images_per_class(data, labels, num_classes, threshold):
images = []
classes = []
for class_id in range(0, num_classes):
group = data[labels == class_id]
if len(group) > threshold:
group = group[:threshold]
for image in group:
images.append(image)
classes.append(class_id)
return np.array(images), np.array(classes)
X_train, y_train = equalize_images_per_class(X_train, y_train, num_classes, int(median))
# histogram of class frequency
fig, ax = plt.subplots()
hist, bins = np.histogram(y_train, bins=n_classes)
center = np.array(range(0, n_classes))
ax.bar(center, hist, align='center', width=0.8)
ax.set_title("Number of images per class")
ax.set_xlabel('Class')
ax.set_ylabel('Number of images')
fig.tight_layout()
plt.show()
The distribution is still not optimal, but much better than before. Unfortunately, some classes are very poorly represented, but it makes no sense to use even fewer images because as a rule of thumb you should have at least 1000 images per class to get a good trainig result.
I trained and tested the network once with and once without adjusting the number of pictures. Although I achieved better training and validation results without the adjustment, the network worked better with the alignment during the test.
Next, let's look at the pictures of the dataset. The following diagram shows ten randomly selected images of each class in the training dataset.
import random
def show_dataset(X, y, sign_names, columns=10):
classes = np.unique(y)
# show image of 10 random data points
fig, axs = plt.subplots(len(classes), columns, figsize=(15, 120))
fig.subplots_adjust(hspace=0.3)
axs = axs.ravel()
for row, class_id in enumerate(classes):
group = X[y == class_id]
sign_name = sign_names[class_id]
for col in range(columns):
image = group[random.randint(0, len(group) - 1)]
index = row * columns + col
axs[index].axis('off')
axs[index].set_title(sign_name)
if len(image.shape) == 3:
axs[index].imshow(image)
else:
axs[index].imshow(image, cmap='gray')
plt.show()
show_dataset(X_train, y_train, np.unique(y_train).astype(str))
In this step I tested some preprocessing techniques like grayscaling and local histogram equalization.
I trained and tested my network with and without grayscale images respectively local histogram equalization. With grayscale images and histogram equalization, I could achieve better training results, but in the recognition of real images from the Internet I have achieved better results when I use color images. Finally I decided to use color images for this project.
For many classification problems, the colors play little role, just shapes and edges, etc. In these cases, it may be helpful to use grayscale images instead of color images. Especially with large datasets, this speeds up the training process and can also have a positive effect on the achieved accuracy.
The following diagram shows the images of the training dataset as grayscale images.
import cv2
# convert images to grayscale
def to_grayscale(images):
result = []
for image in images:
result.append(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY))
return np.array(result)
train_gray = to_grayscale(X_train)
show_dataset(train_gray, y_train, np.unique(y_train).astype(str))
This technique simply distributes the most common intensity values in an image, improving low-contrast images.
import skimage.morphology as morp
from skimage.filters import rank
# apply local histogram equalization
def local_histogram_equalization(image):
kernel = morp.disk(30)
img_local = rank.equalize(image, selem=kernel)
return img_local
train_equalized = np.array(list(map(local_histogram_equalization, train_gray)))
show_dataset(train_equalized, y_train, np.unique(y_train).astype(str))
I have tested my network with and without grayscale images respectively histogram Equalization. With grayscale images and histogram equalization, I could easily achieve better training results, but in the recognition of real images from the Internet I have achieved better results when I use color images. So I decided to use color images for this project.
Data augmentation is a great technique for artificially propagating images of a dataset by duplicating existing images through random manipulations such as scaling, rotation, tilt, noise, ect.
This can be done either by hand by building an augmentation pipeline, analogous to a preprocessing pipeline, which makes the appropriate manipulations. For example, OpenVC offers numerous functions for image manipulation.
But I prefer to use ready-made libraries like imgaug or augmentor. Here the augmentation pipeline will be described declaratively, which is very clear. For this project I use augmentor, see https://augmentor.readthedocs.io/en/master/. The augmentor delivers already normalized images, so that the normalization by the image preprocessing is dropped.
I use random zoom between factor 0.8 and 1.2 to simulate different distances from the camera to the signs.
import Augmentor
p = Augmentor.Pipeline()
p.zoom(probability=0.8, min_factor=0.8, max_factor=1.2)
for class_id in range(0, 10):
y = np.nonzero(y_train == class_id)[0]
x = X_train[y_train == class_id]
datagen = p.keras_generator_from_array([x[0]], [class_id], batch_size=10)
images, labels = next(datagen)
show_dataset(images, labels, np.full(len(labels), class_id, dtype=int))
I use a random of +/- 15 degree rotation to simulate signs that appear slightly rotated.
p = Augmentor.Pipeline()
p.rotate(probability=0.8, max_left_rotation=15, max_right_rotation=15)
for class_id in range(0, 10):
y = np.nonzero(y_train == class_id)[0]
x = X_train[y_train == class_id]
datagen = p.keras_generator_from_array([x[0]], [class_id], batch_size=10)
images, labels = next(datagen)
show_dataset(images, labels, np.full(len(labels), class_id, dtype=int))
I use a random horizontal respectively vertical tilt to simulate different camera perspectives on traffic signs.
p = Augmentor.Pipeline()
p.skew(probability=0.8, magnitude=0.2)
for class_id in range(0, 10):
y = np.nonzero(y_train == class_id)[0]
x = X_train[y_train == class_id]
datagen = p.keras_generator_from_array([x[0]], [class_id], batch_size=10)
images, labels = next(datagen)
show_dataset(images, labels, np.full(len(labels), class_id, dtype=int))
The final augmentation pipeline for this project combines the three augmentation methods described above.
import Augmentor
p = Augmentor.Pipeline()
p.zoom(probability=0.8, min_factor=0.8, max_factor=1.2)
p.rotate(probability=0.8, max_left_rotation=15, max_right_rotation=15)
p.skew(probability=0.8, magnitude=0.2)
for class_id in range(0, 10):
y = np.nonzero(y_train == class_id)[0]
x = X_train[y_train == class_id]
datagen = p.keras_generator_from_array([x[0]], [class_id], batch_size=10)
images, labels = next(datagen)
show_dataset(images, labels, np.full(len(labels), class_id, dtype=int))
Goal of the project is to design and train a model that achieves an accuracy of 93% or greater, on the validation set. In this step, I designed and tested a model architecture to achieve this goal.
Since I'm already familiar with Keras and love the streamlined API of Keras, I decided to create the project with Keras and Tesorflow as the backend.
Optimization Method
The following method allows to change the optimization method very easy. So the network architecture can be easily tested with different optimization methods to select the best one.
from keras.optimizers import SGD, Adam, RMSprop, Adagrad, Adadelta
def get_optimizer(optimizer_method):
if optimizer_method == "sdg":
return SGD(lr=1e-2, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
if optimizer_method == "rmsprop":
return RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
if optimizer_method == "adam":
return Adam(lr=0.001, decay=0.001 / num_epochs)
# Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
if optimizer_method == "adagrad":
return Adagrad(lr=0.01, epsilon=1e-08, decay=0.0)
if optimizer_method == "adadelta":
return Adadelta(lr=1.0, rho=0.95, epsilon=1e-08, decay=0.0)
Callback Methods
I use three callback methods:
EarlyStopping: Stops the training process prematurely if no improvement has been achieved for several consecutive epochs.
ModelCheckpoint: Saves the best model ever learned after each epoch.
ReduceLROnPlateau: Automatically reduces the learning rate if no improvement has been achieved over several epochs.
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, ProgbarLogger
def get_callbacks(model_architecture, optimizer_method):
model_filepath = './output/traffic_signs_model_{}_{}.h5'.format(model_architecture, optimizer_method)
callbacks = [
EarlyStopping(monitor='loss', min_delta=0, patience=5, mode='auto', verbose=1),
ModelCheckpoint(model_filepath, monitor='val_loss', save_best_only=True, verbose=1),
ReduceLROnPlateau(monitor='loss', factor=0.1, patience=2, verbose=1, mode='auto', min_delta=1e-4, cooldown=0,
min_lr=0)]
return callbacks
Helper method to create and save a picture of the training history after the training process.
def plot_train_history(H, model_architecture, optimizer_method):
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, len(H.history["loss"])), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, len(H.history["val_loss"])), H.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, len(H.history["acc"])), H.history["acc"], label="train_acc")
plt.plot(np.arange(0, len(H.history["val_acc"])), H.history["val_acc"], label="val_acc")
plt.title("Training Loss and Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend()
plt.savefig('./output/training-loss-and-accuracy_{}_{}.png'.format(model_architecture, optimizer_method))
plt.show()
Here I normalize the images between 0.0 and 1.0. The image data should be normalized so that the data has mean zero and equal variance. I only do this for the validation data, because the image augmentor we use for the training data already supplies normalized data.
Also the class labels y_train and y_valid must be converted to one hot labels.
import keras
# normalize data between 0.0 and 1.0
# don't normalize X_train, because this is already done by the augmentation
X_valid = X_valid.astype('float32') / 255
# convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_valid = keras.utils.to_categorical(y_valid, num_classes)
Here I configure the hyperparameter for the training, these are the batch size, the maximum number of epochs and the optimization method.
I tried the three optimization methods sdg, adam and rmsprop. With rmsprop I got significant better results than with the others, so I finally use the rmsprop optimization method.
# hyperparameter for training
optimizer_method = 'rmsprop'
batch_size = 256
num_epochs = 100
LeNet-5 is a convolutional network designed for handwritten and machine-printed character recognition. It was introduced by the Yann LeCun in his paper Gradient-Based Learning Applied to Document Recognition in 1998. We can also use the LeNet architecture to classify traffic signs.
LeNet Architecture:

All we need is to change the input shape from (32, 32) to (32, 32, 3) because we use color images instead of grayscale images. Also the output size must be changed from 10 to 43 classes.
from keras.models import Sequential
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Flatten
from keras.layers.core import Dense
from keras.layers.core import Dropout
# LeNet model architecture
class LeNet:
@staticmethod
def build(num_classes):
model = Sequential()
# Layer 1
# Conv Layer 1 => 28x28x6
model.add(Conv2D(filters=6, kernel_size=5, strides=1, activation='relu', input_shape=(32, 32, 3)))
# Layer 2
# Pooling Layer 1 => 14x14x6
model.add(MaxPooling2D(pool_size=(2, 2)))
# Layer 3
# Conv Layer 2 => 10x10x16
model.add(Conv2D(filters=16, kernel_size=5, strides=1, activation='relu', input_shape=(14, 14, 6)))
# Layer 4
# Pooling Layer 2 => 5x5x16
model.add(MaxPooling2D(pool_size=2, strides=2))
# Flatten
model.add(Flatten())
# Layer 5
# Fully connected layer 1 => 120x1
model.add(Dense(units=120, activation='relu'))
model.add(Dropout(0.5))
# Layer 6
# Fully connected layer 2 => 84x1
model.add(Dense(units=84, activation='relu'))
model.add(Dropout(0.5))
# Output Layer => num_classes x 1
model.add(Dense(units=num_classes, activation='softmax'))
# show and return the constructed network architecture
model.summary()
return model
Train LeNet Model
The fit_generator() method uses the data generator datagen created by the augmentation pipeline p, this provides newly augmented images for each batch. fit_generator() also shuffles the dataset by default. After training the training history will be drawn and saved in a diagram.
model_architecture = 'lenet'
# image augmentation
datagen = p.keras_generator_from_array(X_train, y_train, batch_size=batch_size)
# build LeNet model
lenet_model = LeNet.build(num_classes)
# the function to optimize is the cross entropy between the true label and the output (softmax) of the model
lenet_model.compile(optimizer=get_optimizer(optimizer_method), loss='categorical_crossentropy', metrics=['accuracy'])
# train model
H = lenet_model.fit_generator(datagen,
validation_data=(X_valid, y_valid),
steps_per_epoch=len(X_train) / batch_size,
callbacks=get_callbacks(model_architecture, optimizer_method),
epochs=num_epochs,
verbose=2)
# plot and save the training loss and accuracy
plot_train_history(H, model_architecture, optimizer_method)
We reached a maximum validation accuracy of 91.25% (last saved model). From about epoch 55, there has hardly been an improvement.
Evaluate LeNet Model
Here I evaluate the trained LeNet model against the test data X_testand y_test. The test data are also data from the german traffic sing dataset, but the network has never seen it before.
The evaluation calculates the loss and accuracy of the trained model. In addition, some test records are listed with their ground truth labels and predicted labels.
from keras.models import load_model
model_architecture = 'lenet'
with open(testing_file, mode='rb') as f:
test = pickle.load(f)
X_test, y_test = test['features'], test['labels']
# convert class vector to binary class matrix.
y_test = keras.utils.to_categorical(y_test, num_classes)
# normalize data between 0.0 and 1.0
X_test = X_test.astype('float32') / 255
# load trained model
lenet_model = load_model('./output/traffic_signs_model_{}_{}.h5'.format(model_architecture, optimizer_method))
print()
# print loss and accuracy of the trained model
loss, acc = lenet_model.evaluate(X_test, y_test, batch_size=batch_size, verbose=2)
print('Loss: {:.2f}%'.format(loss * 100))
print('Accuracy: {:.2f}%'.format(acc * 100))
print()
# show the true and the predicted classes for a couple of items of the test dataset
y_pred = lenet_model.predict(X_test)
start = 110
count = 20
for i, (y_t, y_p) in enumerate(zip(y_test[start:start + count], y_pred[start:start + count])):
print("{:4d} : True={: <2} Predicted={: <2} {}"
.format(i + start, y_t.argmax(axis=-1), y_p.argmax(axis=-1),
y_t.argmax(axis=-1) == y_p.argmax(axis=-1)))
The evaluation of the LeNet model with the test data resulted in a test accuracy of 88.84%.
The result is not bad, but the goal of a minimum accuracy of 93% is not reached!
The VGG network architecture was introduced by Simonyan and Zisserman in their 2014 paper, Very Deep Convolutional Networks for Large Scale Image Recognition. It is one of the highest performing Convolutional Neural Networks on the ImageNet challenge over the past few years.
VGGNet Architecture:

This network is characterized by its simplicity, using only 3×3 convolutional layers stacked on top of each other in increasing depth. Reducing volume size is handled by max pooling. Two fully-connected layers, each with 4,096 nodes are then followed by a softmax classifier.
In order to improve the weak result obtained with the LeNet model, I use a variant of the VGGNet architecture from the book Deep Learning for Computer Vision of Adrian Rosebrock, called MiniVGGNet.
MiniVGGNet Architecture:

To use the MiniVGGNet architecture described above for traffic sign classification the size of the last fully connected layer must be changed from 10 to 43 classes.
from keras.layers import Activation, BatchNormalization
class MiniVGGNet:
@staticmethod
def build(num_classes):
model = Sequential()
chanDim = -1
# first CONV => RELU => CONV => RELU => POOL layer set
model.add(Conv2D(32, (3, 3), padding="same", input_shape=(32, 32, 3)))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(32, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
# second CONV => RELU => CONV => RELU => POOL layer set
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
# first (and only) set of FC => RELU layers
model.add(Flatten())
model.add(Dense(512))
model.add(Activation("relu"))
model.add(BatchNormalization())
model.add(Dropout(0.5))
# softmax classifier
model.add(Dense(num_classes))
model.add(Activation("softmax"))
# show and return the constructed network architecture
model.summary()
return model
Train MiniVGGNet Model
The trainig process is the same as decribed above, only the model construction changed from LeNet to MiniVGGNet model.
model_architecture = 'vggnet'
# image augmentation
datagen = p.keras_generator_from_array(X_train, y_train, batch_size=batch_size)
# build MiniVGGNet model
vggnet_model = MiniVGGNet.build(num_classes)
# the function to optimize is the cross entropy between the true label and the output (softmax) of the model
vggnet_model.compile(optimizer=get_optimizer(optimizer_method), loss='categorical_crossentropy', metrics=['accuracy'])
# train model
H = vggnet_model.fit_generator(datagen,
validation_data=(X_valid, y_valid),
steps_per_epoch=len(X_train) / batch_size,
callbacks=get_callbacks(model_architecture, optimizer_method),
epochs=num_epochs,
verbose=2)
# plot and save the training loss and accuracy
plot_train_history(H, model_architecture, optimizer_method)
We reached a maximum validation accuracy of 97.30% (last saved model).
Evaluate MiniVGGNet Model
The evaluation is the same as already described above.
import keras
from keras.models import load_model
model_architecture = 'vggnet'
with open(testing_file, mode='rb') as f:
test = pickle.load(f)
X_test, y_test = test['features'], test['labels']
# convert class vector to binary class matrix.
y_test = keras.utils.to_categorical(y_test, num_classes)
# normalize data between 0.0 and 1.0
X_test = X_test.astype('float32') / 255
# load trained model
vggnet_model = load_model('./output/traffic_signs_model_{}_{}.h5'.format(model_architecture, optimizer_method))
print()
# print loss and accuracy of the trained model
loss, acc = vggnet_model.evaluate(X_test, y_test, batch_size=batch_size, verbose=2)
print('Loss: {:.2f}%'.format(loss * 100))
print('Accuracy: {:.2f}%'.format(acc * 100))
print()
# show the true and the predicted classes for a couple of items of the test dataset
y_pred = vggnet_model.predict(X_test)
start = 110
count = 20
for i, (y_t, y_p) in enumerate(zip(y_test[start:start + count], y_pred[start:start + count])):
print("{:4d} : True={: <2} Predicted={: <2} {}"
.format(i + start, y_t.argmax(axis=-1), y_p.argmax(axis=-1),
y_t.argmax(axis=-1) == y_p.argmax(axis=-1)))
The evaluation of the MiniVGGNet model with the test data resulted in a test accuracy of 94.66%.
By further tuning the hyperparameters and improving the data preprocessing, a further improvement can be achieved, but the target of 93% was reached!
Confusion Matrix
A confusion matrix is a summary of prediction results on a classification problem.
The number of correct and incorrect predictions are summarized with count values and broken down by each class. The confusion matrix shows in which cases the classification model is confused when it makes predictions.
Here is the confusion matrix of the trained MiniVGGNet model calculated with the test data. The confusion matrix shows that the model has very few outliers.
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test.argmax(axis=1), y_pred.argmax(axis=1))
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] # Normalize
plt.figure(figsize=(20, 10))
plt.ylabel('True class')
plt.xlabel('Predicted class')
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
plt.colorbar()
In this step I test my model with some pictures of German traffic signs from the web.
I have collected 20 images of German traffic signs from the Internet and stored in the directory test_images.
import glob
import cv2
# show test images
filenames = glob.glob('./test_images/*.jpg')
num_files = int(len(filenames))
cols = 5
rows = int(num_files / cols)
if num_files % cols > 0:
rows += 1
fig, axs = plt.subplots(rows, cols, figsize=(20, 15))
axs = axs.ravel()
for i, filename in enumerate(filenames):
image = cv2.imread(filename)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
axs[i].axis('off')
axs[i].imshow(image)
axs[i].set_title(filename)
plt.show()
Here I predict the classes for the 20 test images and output them together with the names of the traffic signs.
import glob
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from keras.models import load_model
# read and preprocess test images
original_images = []
X_test = []
filenames = glob.glob('./test_images/*.jpg')
for filename in filenames:
image = cv2.imread(filename)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
original_images.append(image)
resized_image = cv2.resize(image, (32, 32), interpolation=cv2.INTER_AREA)
X_test.append(resized_image)
X_test = np.array(X_test)
# normalize data between 0.0 and 1.0
X_test = X_test.astype('float32') / 255
# load trained vggnet model
model = load_model('./output/traffic_signs_model_{}_{}.h5'.format('vggnet', 'sdg'))
# predict
y_pred = model.predict(X_test)
# show test images with class predictions
num_files = int(len(filenames))
cols = 5
rows = int(num_files / cols)
if num_files % cols > 0:
rows += 1
fig, axs = plt.subplots(rows, cols, figsize=(20, 15))
axs = axs.ravel()
for i, (filename, image, org_image) in enumerate(zip(filenames, X_test, original_images)):
class_id = y_pred.argmax(axis=-1)[i]
class_name = sign_names[class_id][1]
axs[i].axis('off')
axs[i].imshow(org_image)
axs[i].set_title('{}: {}'.format(class_id, class_name))
plt.show()
Even when testing with new images from the Web, the trained model achieved good results. Only one of the 20 new images was classified incorrectly.
### Calculate the accuracy for these 5 new images.
### For example, if the model predicted 1 out of 5 signs correctly, it's 20% accurate on these new images.
y_true = np.array([25, 17, 28, 1, 23, 22, 33, 14, 7, 18, 26, 1, 4, 11, 13, 12, 12, 13, 25, 36])
print("True: " + str(y_true))
print("Predicted: " + str(y_pred.argmax(axis=-1)))
test_accuracy = sum(y_true == y_pred.argmax(axis=-1))/len(y_true)
print("Test Accuracy = {:.1f}%".format(test_accuracy*100))
With an accuracy of 95%, the above-mentioned test accuracy (94.66%) is reflected again!
It is noticeable that the misrecognized image should have class 23. This is one of the classes underrepresented in the training data.
For each of the new images, print out the model's softmax probabilities to show the certainty of the model's predictions (limit the output to the top 5 probabilities for each image).
### Print out the top five softmax probabilities for the predictions on the German traffic sign images found on the web.
k = 5
n = len(filenames)
plt.figure(figsize=(15, 50))
plt.subplots_adjust(hspace=0.5)
for i, (filename, prob, org_image) in enumerate(zip(filenames, y_pred, original_images)):
top_values_index = sorted(range(len(prob)), key=lambda p: prob[p])[-k:]
class_id = prob.argmax(axis=-1)
class_name = sign_names[class_id][1]
plt.subplot(n, 2, 2 * i + 1)
plt.imshow(original_images[i])
plt.title(filename)
plt.axis('off')
plt.subplot(n, 2, 2 * i + 2)
plt.barh(np.arange(1, 6, 1), prob[top_values_index])
labels = np.array([sign_names[j] for j in top_values_index])
plt.yticks(np.arange(1, 6, 1), labels[:, 1])
plt.show()
The diagram above shows that most classes are clearly recognized. Only in "Speed limit (100km/h)" the top 5 predictions are close together. But that is also because the dataset contains of several classes that are very similar.
To get a better understanding of how the network sees an image and which features lead to classification, it is useful to visualize the activation maps of the various network layers. The following example visualizes the activation maps for one of the test images. Therefore I used code snippets from https://github.com/philipperemy/keras-activations
import keras.backend as K
import numpy as np
import matplotlib.pyplot as plt
def get_activations(model, model_inputs):
outputs = [layer.output for layer in model.layers]
funcs = [K.function([model.input] + [K.learning_phase()], [out]) for out in outputs] # evaluation functions
list_inputs = [[model_inputs], 0.]
activations = [func(list_inputs)[0] for func in funcs]
layer_names = [output.name for output in outputs]
result = dict(zip(layer_names, activations))
return result
def display_activations(activations):
layer_names = list(activations.keys())
activation_maps = list(activations.values())
batch_size = activation_maps[0].shape[0]
for i, activation_map in enumerate(activation_maps):
print('Activation map {}'.format(i))
shape = activation_map.shape
if len(shape) == 4:
activations = np.hstack(np.transpose(activation_map[0], (2, 0, 1)))
elif len(shape) == 2:
# try to make it square as much as possible. we can skip some activations.
activations = activation_map[0]
num_activations = len(activations)
if num_activations > 1024: # too hard to display it on the screen.
square_param = int(np.floor(np.sqrt(num_activations)))
activations = activations[0: square_param * square_param]
activations = np.reshape(activations, (square_param, square_param))
else:
activations = np.expand_dims(activations, axis=0)
else:
raise Exception('len(shape) = 3 has not been implemented.')
fig, ax = plt.subplots(figsize=(30, 30))
plt.title(layer_names[i])
ax.imshow(activations, interpolation='None', cmap='viridis')
plt.show()
# image to visualize
test_image = X_test[0]
plt.imshow(test_image)
activations = get_activations(model, test_image)
display_activations(activations)